pacman::p_load(
rio, # import funcs
sf, # work with spatial data
here, # create relative paths
janitor, # data cleaning
lubridate, # date handling
tidyverse # data science
)
conflicted::conflict_prefer("select", "dplyr")
conflicted::conflict_prefer("filter", "dplyr")
# linelist
dat_raw <- import(here::here("data", "final", "msf_linelist_moissala_2023-06-11.xlsx")) |> as.tibble()
# lab data
lab_raw <- import(here::here("data", "final", "msf_laboratory_moissala_2023-06-11.xlsx"))
# admin data
admin_1 <- st_read(here::here("data", "gpkg", "GEO-EXPORT-TCD-2024-04-11.gpkg"), layer = "ADM1")
## Reading layer `ADM1' from data source
## `/Users/hugzsoubrier/GitHub/fake-data/data/gpkg/GEO-EXPORT-TCD-2024-04-11.gpkg'
## using driver `GPKG'
## Simple feature collection with 23 features and 8 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 13.47348 ymin: 7.441069 xmax: 24.00269 ymax: 23.45037
## Geodetic CRS: WGS 84
admin_2 <- st_read(here::here("data", "gpkg", "GEO-EXPORT-TCD-2024-04-11.gpkg"), layer = "ADM2")
## Reading layer `ADM2' from data source
## `/Users/hugzsoubrier/GitHub/fake-data/data/gpkg/GEO-EXPORT-TCD-2024-04-11.gpkg'
## using driver `GPKG'
## Simple feature collection with 132 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 13.47348 ymin: 7.441069 xmax: 24.00269 ymax: 23.45037
## Geodetic CRS: WGS 84
This document provides directions for the analysis of the fake
Measles dataset msf_linelist_moissala_2023-09-24.xlsx and
the its corresponding laboratory dataset
msf_laboratory_moissala_2023-09-24.xlsx.
For measles outbreaks, it makes sense to use the following age classification:
0 - 11 months< 6 months6 - 8 months9 - 11 months1 - 4 years5 - 14 years15+ yearsMUAC is classified as follow:
Green (125+ mm)Yellow (115 - 124 mm)Red (<115 mm)confirmed: cases with a positive PCR result in lab data
probable: cases with fever,
coughand a rash suspected: all
other cases
dat <- dat_raw |>
# standardise variable names
janitor::clean_names() |>
# manually rename
rename(
id = epi_id_number,
sex = sex_patient,
age_unit = age_units_months_years,
date_onset = date_of_onset_of_symptoms,
hospitalisation = hospitalisation_yes_no,
date_admission = date_of_admission_in_structure,
date_outcome = death_outcome_death_recovered_lama,
sub_prefecture = sub_prefecture_of_residence,
region = region_of_residence,
fever = participant_had_fever,
rash = participant_had_rash,
cough = participant_had_cough,
red_eye = participant_had_red_eye,
pneumonia = participant_had_pneumonia,
encephalitis = participant_had_encephalitis,
muac = middle_upper_arm_circumference_muac,
vacc_status = vaccination_status,
vacc_doses = vaccination_dosage,
outcome = patient_outcome,
site = msf_site,
malaria_rdt = malaria_rdt
) |>
# recoding
mutate(
sex = case_when(
sex %in% c("f", "femme") ~ "female",
sex %in% c("m", "homme") ~ "male",
.default = sex
),
across(contains("date_"), ~ ymd(.x)),
across(c(fever, rash, cough, red_eye, pneumonia, encephalitis), ~ case_match(.x, "Yes" ~ TRUE, "No" ~ FALSE, .default = NA))
) |>
# Categorise variables
mutate(
age_group = case_when(
age_unit == "months" & age < 6 ~ "< 6 months",
age_unit == "months" & between(age, 6, 8) ~ "6 - 8 months",
age_unit == "months" & between(age, 9, 11) ~ "9 - 11 months",
age_unit == "years" & between(age, 1, 4) ~ "1 - 4 years",
age_unit == "years" & between(age, 5, 14) ~ "5 - 14 years",
age_unit == "years" & between(age, 15, 40) ~ "15+ years"
),
age_group = fct_relevel(
age_group,
c(
"< 6 months",
"6 - 8 months",
"9 - 11 months",
"1 - 4 years",
"5 - 14 years",
"15+ years"
)
),
muac_cat = case_when(
muac >= 125 ~ "Green (125+ mm)",
between(muac, 115, 124) ~ "Yellow (115 - 124 mm)",
muac < 115 ~ "Red (<115 mm)"
)
) |>
relocate(
age_group,
.after = age_unit
) |>
relocate(muac_cat, .after = muac) |>
filter(date_onset >= "2022-01-01")
lab_clean <- lab_raw |>
clean_names() |>
rename(
case_id = msf_number_id,
lab_id = laboratory_id,
date_test = date_of_the_test,
test_result = final_test_result
) |>
mutate(
date_test = ymd(date_test),
ct_value = round(ct_value, digits = 1)
)
There are some duplicates in the laboratory results. Some
case_id were tested multiple times if there was a
inconclusive test_result. We need to find
them, and take the last sample tested
reactable::reactable(lab_clean |> get_dupes(case_id))
lab_clean <- lab_clean |>
filter(
.by = case_id,
date_test == max(date_test, na.rm = TRUE)
)
Some samples were negative, so these cases are not cases and need to be removed from analysis
lab_clean |> count(test_result)
## test_result n
## 1 negative 62
## 2 positive 481
We join the lab_clean to the main linelists using the
case_id as key. Then remove the negative case, and create
an epidemiological classification
dat <- left_join(dat, lab_clean, by = c("id" = "case_id"))
dat <- dat |>
filter(is.na(test_result) | test_result == "positive") |>
mutate(
epi_cat = case_when(
test_result == "positive" ~ "confirmed",
rash == TRUE & fever == TRUE & cough == TRUE ~ "probable",
.default = "suspected"
),
epi_cat = fct_relevel(epi_cat, c("confirmed", "probable", "suspected"))
)
reactable::reactable(dat |> tabyl(epi_cat) |> mutate(percent = round(percent * 100, digits = 2)))
dat |>
select(
sex,
age_group,
muac_cat,
vacc_status
) |>
gtsummary::tbl_summary(label = list(
sex ~ "Gender",
age_group ~ "Age groups",
muac_cat ~ "MUAC category",
vacc_status = "Vaccination status",
malaria_rdt = "Malaria RDT",
outcome = "Outcome"
))
| Characteristic | N = 2,7881 |
|---|---|
| Gender | |
| female | 1,428 (51%) |
| male | 1,360 (49%) |
| Age groups | |
| < 6 months | 145 (5.4%) |
| 6 - 8 months | 302 (11%) |
| 9 - 11 months | 276 (10%) |
| 1 - 4 years | 1,274 (47%) |
| 5 - 14 years | 528 (20%) |
| 15+ years | 167 (6.2%) |
| Unknown | 96 |
| MUAC category | |
| Green (125+ mm) | 2,038 (73%) |
| Red (<115 mm) | 254 (9.1%) |
| Yellow (115 - 124 mm) | 496 (18%) |
| Vaccination status | |
| No | 1,503 (64%) |
| Uncertain | 440 (19%) |
| Yes - card | 22 (0.9%) |
| Yes - oral | 395 (17%) |
| Unknown | 428 |
| 1 n (%) | |
dat |>
select(
sex,
age_group,
muac_cat,
vacc_status,
site
) |>
gtsummary::tbl_summary(
by = site,
label = list(
sex ~ "Gender",
age_group ~ "Age groups",
muac_cat ~ "MUAC category",
vacc_status = "Vaccination status",
malaria_rdt = "Malaria RDT",
outcome = "Outcome"
)
)
| Characteristic | Bedaya Hospital, N = 4321 | Bekourou Hospital, N = 1581 | Bouna Hospital, N = 5141 | Danamadji Hospital, N = 1481 | Koumogo Hospital, N = 51 | Moïssala Hospital, N = 1,5311 |
|---|---|---|---|---|---|---|
| Gender | ||||||
| female | 215 (50%) | 81 (51%) | 251 (49%) | 76 (51%) | 2 (40%) | 803 (52%) |
| male | 217 (50%) | 77 (49%) | 263 (51%) | 72 (49%) | 3 (60%) | 728 (48%) |
| Age groups | ||||||
| < 6 months | 23 (5.5%) | 9 (5.8%) | 23 (4.6%) | 9 (6.4%) | 0 (0%) | 81 (5.5%) |
| 6 - 8 months | 58 (14%) | 18 (12%) | 54 (11%) | 15 (11%) | 0 (0%) | 157 (11%) |
| 9 - 11 months | 42 (10%) | 16 (10%) | 59 (12%) | 17 (12%) | 0 (0%) | 142 (9.6%) |
| 1 - 4 years | 185 (44%) | 78 (51%) | 234 (47%) | 62 (44%) | 3 (60%) | 712 (48%) |
| 5 - 14 years | 85 (20%) | 23 (15%) | 89 (18%) | 32 (23%) | 2 (40%) | 297 (20%) |
| 15+ years | 23 (5.5%) | 10 (6.5%) | 37 (7.5%) | 6 (4.3%) | 0 (0%) | 91 (6.1%) |
| Unknown | 16 | 4 | 18 | 7 | 0 | 51 |
| MUAC category | ||||||
| Green (125+ mm) | 329 (76%) | 113 (72%) | 381 (74%) | 101 (68%) | 5 (100%) | 1,109 (72%) |
| Red (<115 mm) | 35 (8.1%) | 16 (10%) | 42 (8.2%) | 16 (11%) | 0 (0%) | 145 (9.5%) |
| Yellow (115 - 124 mm) | 68 (16%) | 29 (18%) | 91 (18%) | 31 (21%) | 0 (0%) | 277 (18%) |
| Vaccination status | ||||||
| No | 241 (65%) | 92 (67%) | 272 (63%) | 77 (63%) | 1 (50%) | 820 (63%) |
| Uncertain | 55 (15%) | 18 (13%) | 95 (22%) | 24 (20%) | 1 (50%) | 247 (19%) |
| Yes - card | 6 (1.6%) | 2 (1.4%) | 3 (0.7%) | 2 (1.6%) | 0 (0%) | 9 (0.7%) |
| Yes - oral | 67 (18%) | 26 (19%) | 61 (14%) | 20 (16%) | 0 (0%) | 221 (17%) |
| Unknown | 63 | 20 | 83 | 25 | 3 | 234 |
| 1 n (%) | ||||||
dat |>
select(
sex,
age_group,
site
) |>
apyramid::age_pyramid(
age_group = "age_group",
split_by = "sex",
proportional = TRUE,
show_midpoint = TRUE
) +
theme_minimal()
# CFR only on known outcomes
dat |>
summarise(
.by = site,
n_cases = n(),
n_confirmed = sum(epi_cat == "confirmed"),
n_deaths = sum(outcome == "dead", na.rm = TRUE),
cfr = round(digits = 2, n_deaths / sum(outcome %in% c("recovered", "dead")) * 100)
) |>
reactable::reactable(columns = list(
n_cases = reactable::colDef(name = "N cases"),
n_confirmed = reactable::colDef(name = "N confirmed"),
n_deaths = reactable::colDef(name = "N deaths"),
cfr = reactable::colDef(name = "CFR (%)")
))
Investigating age_group, muac_cat and
vacc_status as risks factors for death
# Prepare the data for fitting the logistic regression
prep_logit <- dat |>
# change group order for references
mutate(
age_group = fct_relevel(
age_group,
c(
"15+ years",
"< 6 months",
"6 - 8 months",
"9 - 11 months",
"1 - 4 years",
"5 - 14 years"
)
),
muac_cat = fct_relevel(
muac_cat,
c(
"Green (125+ mm)",
"Yellow (115 - 124 mm)",
"Red (<115 mm)"
)
),
vacc_status = case_match(
vacc_status,
"Yes - card" ~ "Yes",
"Yes - oral" ~ "Yes",
"Uncertain" ~ NA,
.default = vacc_status
),
vacc_status = fct_relevel(
vacc_status,
c(
"No",
"Yes"
)
),
# outcome needs to be 1/0
outcome_binary = case_when(
outcome == "recovered" ~ 0,
outcome == "dead" ~ 1,
.default = NA
)
)
# fit the logistic regression
mdl <- glm(outcome_binary ~ sex + age_group + vacc_status + muac_cat, data = prep_logit, family = "binomial")
# view coeff
gtsummary::tbl_regression(
mdl,
exp = TRUE,
label = list(
sex ~ "Gender",
age_group ~ "Age groups",
muac_cat ~ "MUAC category",
vacc_status = "Vaccination status"
),
intercept = TRUE,
conf.int = TRUE
)
| Characteristic | OR1 | 95% CI1 | p-value |
|---|---|---|---|
| (Intercept) | 0.04 | 0.01, 0.11 | <0.001 |
| Gender | |||
| female | — | — | |
| male | 1.04 | 0.75, 1.45 | 0.8 |
| Age groups | |||
| 15+ years | — | — | |
| < 6 months | 5.67 | 2.01, 20.3 | 0.003 |
| 6 - 8 months | 3.57 | 1.33, 12.5 | 0.022 |
| 9 - 11 months | 3.11 | 1.13, 11.0 | 0.044 |
| 1 - 4 years | 1.99 | 0.79, 6.70 | 0.2 |
| 5 - 14 years | 0.74 | 0.23, 2.82 | 0.6 |
| Vaccination status | |||
| No | — | — | |
| Yes | 0.34 | 0.18, 0.57 | <0.001 |
| MUAC category | |||
| Green (125+ mm) | — | — | |
| Yellow (115 - 124 mm) | 1.71 | 1.12, 2.57 | 0.011 |
| Red (<115 mm) | 2.69 | 1.68, 4.21 | <0.001 |
| 1 OR = Odds Ratio, CI = Confidence Interval | |||
dat |>
mutate(
epiweek = floor_date(date_onset, unit = "week")
) |>
ggplot() +
geom_bar(
aes(
x = epiweek,
fill = epi_cat
),
position = position_stack()
) +
scale_x_date(
breaks = "2 weeks",
date_labels = "%Y -W%W"
) +
scale_fill_manual(
"Epi status",
values = c(
"confirmed" = "#912c2c",
"probable" = "#c4833d",
"suspected" = "#edd598"
)
) +
labs(
x = "Epiweek",
y = "N cases",
title = glue::glue("Epicurve of measle outbreak in Southern Chad"),
subtitle = glue::glue({
"{nrow(dat)} cases observed from {min(dat$date_onset, na.rm = TRUE)} to {max(dat$date_onset, na.rm = TRUE)}"
})
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, size = 5))
dat |>
mutate(
epiweek = floor_date(date_onset, unit = "week")
) |>
ggplot() +
geom_bar(
aes(
x = epiweek,
fill = site
),
position = position_stack()
) +
scale_x_date(
breaks = "4 weeks",
date_labels = "%Y -W%W"
) +
labs(
x = "Epiweek",
y = "N cases",
title = glue::glue("Epicurve of measle outbreak in Southern Chad"),
subtitle = glue::glue({
"{nrow(dat)} cases observed from {min(dat$date_onset, na.rm = TRUE)} to {max(dat$date_onset, na.rm = TRUE)}"
})
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, size = 5)) +
facet_wrap(~site) +
gghighlight::gghighlight()
Make a Choropleth map
# clean admin data
dat <- dat |>
mutate(across(c(sub_prefecture, region), ~ str_to_sentence(.x)))
# count cases by adm2
adm_summ <- dat |> summarise(
.by = c(region, sub_prefecture),
n_cases = n(),
n_deaths = sum(outcome == "dead", na.rm = TRUE),
cfr = round(digits = 3, n_deaths / sum(outcome %in% c("recovered", "dead", na.rm = TRUE))),
cfr_lab = scales::percent(cfr, accuracy = .1)
)
# join the count data to the sf
chor_dat <- left_join(
admin_2,
adm_summ,
by = c("adm2_name" = "sub_prefecture")
) |>
# add AR using population data
mutate(
AR = round(digits = 3, n_cases / adm2_pop * 1000),
label = (paste0(
"<b>Region:</b> ",
adm1_name,
"<br><b>Sub-prefecture:</b> ",
adm2_name,
"<br><b>Population:</b> ",
adm2_pop,
"<br><b>Attack Rate:</b> ",
AR,
"<br><b>CFR (%):</b> ",
cfr_lab
))
)
leaf_basemap <- function(
bbox,
baseGroups = c("Light", "OSM", "OSM HOT"),
overlayGroups = c("Boundaries"),
miniMap = TRUE
) {
lf <- leaflet::leaflet() %>%
leaflet::fitBounds(bbox[["xmin"]], bbox[["ymin"]], bbox[["xmax"]], bbox[["ymax"]]) %>%
leaflet::addMapPane(name = "choropleth", zIndex = 310) %>%
leaflet::addMapPane(name = "place_labels", zIndex = 320) %>%
leaflet::addMapPane(name = "circles", zIndex = 410) %>%
leaflet::addMapPane(name = "boundaries", zIndex = 420) %>%
leaflet::addMapPane(name = "geo_highlight", zIndex = 430) %>%
leaflet::addProviderTiles("CartoDB.PositronNoLabels", group = "Light") %>%
leaflet::addProviderTiles(
"CartoDB.PositronOnlyLabels",
group = "Light",
options = leaflet::leafletOptions(pane = "place_labels")
) %>%
leaflet::addProviderTiles("OpenStreetMap", group = "OSM") %>%
leaflet::addProviderTiles("OpenStreetMap.HOT", group = "OSM HOT") %>%
leaflet::addScaleBar(
position = "bottomright",
options = leaflet::scaleBarOptions(imperial = FALSE)
) %>%
leaflet::addLayersControl(
baseGroups = baseGroups,
overlayGroups = overlayGroups,
position = "topleft"
)
if (miniMap) {
lf <- lf %>% leaflet::addMiniMap(toggleDisplay = TRUE, position = "bottomleft")
}
return(lf)
}
bbox <- st_bbox(filter(admin_2, adm1_name == "Mandoul"))
bins <- c(0, 1, 5, 10, 20, Inf)
pal <- leaflet::colorBin("YlOrRd", domain = chor_dat$AR, bins = bins)
labels <- chor_dat$label |> lapply(htmltools::HTML)
leaflet::leaflet() |>
leaf_basemap(bbox, miniMap = TRUE) |>
leaflet::fitBounds(as.character(bbox)[1], as.character(bbox)[2], as.character(bbox)[3], as.character(bbox)[4]) |>
leaflet::addProviderTiles("CartoDB.Positron", group = "Light") |>
leaflet::addScaleBar(position = "bottomright", options = leaflet::scaleBarOptions(imperial = FALSE)) |>
leaflet.extras::addFullscreenControl(position = "topleft") |>
leaflet.extras::addResetMapButton() |>
leaflet::addPolygons(
data = admin_1,
stroke = TRUE,
weight = 1.5,
color = "black",
fill = FALSE,
fillOpacity = 0
) |>
leaflet::addPolygons(
data = chor_dat,
label = ~labels,
stroke = TRUE,
weight = 1.2,
color = "grey",
fillColor = ~ pal(AR),
fillOpacity = 0.3
)